#Replication data for "The Promises and Pitfalls of 311 Data", Urban Affairs Review
#Ariel White and Kris-Stella Trump
#email Ariel (arwhi@mit.edu) with questions
#Posted January 2017

rm(list=ls())
mainwd <- "/nfs/home/A/awhite/shared_space/subs_hsg/final311repdata_forposting/" #set your own accordingly
setwd(mainwd)

library(foreign)
allcalls <- read.csv("tractvolumes_allcalls.csv") #read in file made in NYC311_basic_comparisons.R script
head(allcalls)

require(sp) # the classes and methods that make up spatial ops in R
require(maptools) # tools for reading and manipulating spatial objects
require(mapdata) # includes good vector maps of world political boundaries.

##okay, now actually plot these on a background that will tell us something about the context.

#starting here: http://stackoverflow.com/questions/2078468/plotting-shapefiles-on-top-of-google-map-tiles
library(PBSmapping); library(RgoogleMaps); library(maptools)
shpFile <- system.file("shapes/sids.shp", package="maptools");  
  shp<-importShapefile(shpFile,projection="LL"); 

 shpFile <- paste0(mainwd, "NYtractshapefiles/tl_2010_36_tract10")  #"/nfs/home/A/awhite/shared_space/subs_hsg/final311repdata_forposting/NYtractshapefiles/tl_2010_36_tract10"
  shp<-importShapefile(shpFile,projection="LL");  
  bb <- qbbox(lat = shp[,"Y"], lon = shp[,"X"]);  
  MyMap <- GetMap.bbox(bb$lonR, bb$latR, destfile = "NYproblems.jpg");  
 # PlotPolysOnStaticMap(MyMap, shp, lwd=.5, col = "blue", add = F);

#oh, but I want to do this for only some of these.

#subset to missing ones
library(rgdal)
setwd(paste0(mainwd,"NYtractshapefiles/"))
df1 <-  readOGR(".", "tl_2010_36_tract10")
df <-  readOGR(".", "nyct2010") #switch to tract borders clipped to shoreline from here: http://www1.nyc.gov/site/planning/data-maps/open-data/districts-download-metadata.page
head(df@data)
#but then I'll need to line up the names differently so the subset works.
library(car)
df@data$countyfips <- Recode(df@data$BoroCode, "1='061'; 2='005';3='047';4='081';5='085'") 
df@data$GEOID10 <- paste("36", df@data$countyfips, df@data$CT2010, sep="")
subset <- df[df$GEOID10 %in% allcalls$TRACTID,]
dim(subset)
#plot(subset)
class(subset); class(df); dim(subset); dim(df)
writeOGR(subset, ".", "missingshapes", driver="ESRI Shapefile")

#now load and plot just those.
new = importShapefile("missingshapes.shp",projection="LL")

  bb <- qbbox(lat = new[,"Y"], lon = new[,"X"]);  
  MyMap <- GetMap.bbox(bb$lonR, bb$latR, destfile = "NYproblems.jpg");  


#okay: shade the tracts based on call volumes
#how to split? quartiles? 
#I'm sure there's a more efficient way, but here goes.
summary(allcalls$allyear_2010)
#wait, I also want to drop any tracts with pop < 250
dim(allcalls)
allcalls <- allcalls[allcalls$Tot_Population_CEN_2010 > 250,]
dim(allcalls)
q1cut <- round(summary(allcalls$allyear_2010)[2])
q2cut <- round(summary(allcalls$allyear_2010)[3])
q3cut <- round(summary(allcalls$allyear_2010)[5])
q4cut <- round(summary(allcalls$allyear_2010)[6])


calls1 <- allcalls[allcalls$allyear_2010< q1cut,]
calls2 <- allcalls[allcalls$allyear_2010> q1cut & allcalls$allyear_2010< q2cut,]
calls3 <- allcalls[allcalls$allyear_2010> q2cut & allcalls$allyear_2010< q3cut,]
calls4 <- allcalls[allcalls$allyear_2010> q3cut & allcalls$allyear_2010< q4cut,]

subset1 <- df[df$GEOID10 %in% calls1$TRACTID,]
subset2 <- df[df$GEOID10 %in% calls2$TRACTID,]
subset3 <- df[df$GEOID10 %in% calls3$TRACTID,]
subset4 <- df[df$GEOID10 %in% calls4$TRACTID,]

#oh wait, also want to try to overlay borough outlines (to deal with the water issue)
# pull from same NYC site as tracts.
boroughs <-  readOGR("../NYprecinctshapefiles", "nybb")

pdf(file=paste0(mainwd, "censustractcallvolumes_2010.pdf"))

plot(subset3,col=gray(.5), border=NA, main = "Total 311 Call Volumes By Tract, 2010 : Quartiles")
plot(subset1, col=gray(.9), add=T, border=NA)
plot(subset2, col=gray(.7), add=T, border=NA)
plot(subset4, col=gray(.3), add=T, border=NA)
plot(boroughs, border = "black", cex=5, lwd=1, bg="transparent", add=T)

temp <- legend("topleft", legend = c(paste("Q1:", 0, "-", q1cut, " calls"), paste("Q2:", q1cut, "-", q2cut, " calls"), paste("Q3:", q2cut, "-", q3cut, " calls"), paste("Q4:", q3cut, "-", q4cut, " calls")),
               #text.width = strwidth("1,000,000"),
               fill=c(gray(.9), gray(.7), gray(.5), gray(.3)), xjust = 1, yjust = 1)
               #title = "Line Types")
dev.off()

#okay, now we want to do exactly the same thing, but with pop-adjusted volumes.
head(allcalls)
allcalls$percapita2010 <- (allcalls$allyear_2010)/(allcalls$Tot_Population_CEN_2010)
allcalls$percapita2010[allcalls$percapita2010==Inf] <- NA
summary(allcalls$percapita2010) #18 NAs.

q1cut <- summary(allcalls$percapita2010)[2]
q2cut <- summary(allcalls$percapita2010)[3]
q3cut <- summary(allcalls$percapita2010)[5]
q4cut <- summary(allcalls$percapita2010)[6]

calls1 <- allcalls[allcalls$percapita2010< q1cut,]
calls2 <- allcalls[allcalls$percapita2010> q1cut & allcalls$percapita2010< q2cut,]
calls3 <- allcalls[allcalls$percapita2010> q2cut & allcalls$percapita2010< q3cut,]
calls4 <- allcalls[allcalls$percapita2010> q3cut & allcalls$percapita2010< q4cut,]

subset1 <- df[df$GEOID10 %in% calls1$TRACTID,]
subset2 <- df[df$GEOID10 %in% calls2$TRACTID,]
subset3 <- df[df$GEOID10 %in% calls3$TRACTID,]
subset4 <- df[df$GEOID10 %in% calls4$TRACTID,]


pdf(file=paste0(mainwd, "censustractcallvolumes_2010percapita.pdf"))

plot(subset3, col=gray(.5), border=NA, main = "Per-Capita 311 Call Volumes By Tract, 2010 : Quartiles")

plot(subset1, col=gray(.9), add=T, border=NA)
plot(subset2, col=gray(.7), add=T, border=NA)
plot(subset4, col=gray(.3), add=T, border=NA)
plot(boroughs, border = "black", cex=5, lwd=1, bg="transparent", add=T)
temp <- legend("topleft", legend = c(paste("Q1:", 0, "-", q1cut, " calls"), paste("Q2:", q1cut, "-", q2cut, " calls"), paste("Q3:", q2cut, "-", q3cut, " calls"), paste("Q4:", q3cut, "-", q4cut, " calls")),
               #text.width = strwidth("1,000,000"),
               fill=c(gray(.9), gray(.7), gray(.5), gray(.3)), xjust = 1, yjust = 1)
               #title = "Line Types")

dev.off()


#okay,and NOW we want to do something quite similar, but plotting "need" proxies: tract-level HH income, poverty measures.
#but, bummer, we don't have those so will need to merge in.  where did we get it for the main analysis?
#probably the census planning db (along with response rates)
#try pulling in the merged census tract data

library(foreign)
allcalls <- read.csv("../merged_NYCcensustractdata.csv") #read in

head(allcalls)
#allcalls$percapita2010 <- (allcalls$allyear_2010)/(allcalls$Tot_Population_CEN_2010)
#allcalls$percapita2010[allcalls$percapita2010==Inf] <- NA
#summary(allcalls$percapita2010) #18 NAs.

#start with median income
allcalls$medhhinc <- as.numeric(gsub("[,]", "", gsub("[$]", "", as.character(allcalls$Med_HHD_Inc_ACS_08_12)))) 

q1cut <- summary(allcalls$medhhinc)[2]
q2cut <- summary(allcalls$medhhinc)[3]
q3cut <- summary(allcalls$medhhinc)[5]
q4cut <- summary(allcalls$medhhinc)[6]

calls1 <- allcalls[allcalls$medhhinc< q1cut,]
calls2 <- allcalls[allcalls$medhhinc> q1cut & allcalls$medhhinc< q2cut,]
calls3 <- allcalls[allcalls$medhhinc> q2cut & allcalls$medhhinc< q3cut,]
calls4 <- allcalls[allcalls$medhhinc> q3cut & allcalls$medhhinc< q4cut,]

subset1 <- df[df$GEOID10 %in% calls1$TRACTID,]
subset2 <- df[df$GEOID10 %in% calls2$TRACTID,]
subset3 <- df[df$GEOID10 %in% calls3$TRACTID,]
subset4 <- df[df$GEOID10 %in% calls4$TRACTID,]


pdf(file=paste0(mainwd,"censustractHHincomes_2010.pdf"))

plot(subset3, col=gray(.5), border=NA, main = "Median Household Income by Tract, 2008-2012 ACS : Quartiles", ylim= c(127299.5,  276846.9)) #note the new bbox settings.

plot(subset1, col=gray(.9), add=T, border=NA)
plot(subset2, col=gray(.7), add=T, border=NA)
plot(subset4, col=gray(.3), add=T, border=NA)
plot(boroughs, border = "black", cex=5, lwd=1, bg="transparent", add=T)
temp <- legend("topleft", legend = c(paste("Q1: $", 0, "-", q1cut), paste("Q2: $", q1cut, "-", q2cut), paste("Q3: $", q2cut, "-", q3cut), paste("Q4: $", q3cut, "-", q4cut)),
               #text.width = strwidth("1,000,000"),
               fill=c(gray(.9), gray(.7), gray(.5), gray(.3)), xjust = 1, yjust = 1)
               #title = "Line Types")

dev.off()

#same thing for poverty rates

allcalls$pctpovertyACS <- allcalls$Prs_Blw_Pov_Lev_ACS_08_12/allcalls$Pov_Univ_ACS_08_12
summary(allcalls$pctpovertyACS)
#we were doing this wrong before. see data dictionary here: https://www.census.gov/research/data/planning_database/2014/docs/PDB_Tract_2014-11-20a.pdf

summary(allcalls$pct_Pov_Univ_ACS_08_12)

q1cut <- summary(allcalls$pctpovertyACS)[2]
q2cut <- summary(allcalls$pctpovertyACS)[3]
q3cut <- summary(allcalls$pctpovertyACS)[5]
q4cut <- summary(allcalls$pctpovertyACS)[6]

calls1 <- allcalls[allcalls$pctpovertyACS< q1cut,]
calls2 <- allcalls[allcalls$pctpovertyACS> q1cut & allcalls$pctpovertyACS< q2cut,]
calls3 <- allcalls[allcalls$pctpovertyACS> q2cut & allcalls$pctpovertyACS< q3cut,]
calls4 <- allcalls[allcalls$pctpovertyACS> q3cut & allcalls$pctpovertyACS< q4cut,]

subset1 <- df[df$GEOID10 %in% calls1$TRACTID,]
subset2 <- df[df$GEOID10 %in% calls2$TRACTID,]
subset3 <- df[df$GEOID10 %in% calls3$TRACTID,]
subset4 <- df[df$GEOID10 %in% calls4$TRACTID,]


pdf(file=paste0(mainwd, "censustractpovertyrates_2010.pdf"))

plot(subset4, col=gray(.3), border=NA, main = "Percent of Tract Residents Living in Poverty, ACS 08-12 : Quartiles",ylim=c(126299.5,  276846.9))
plot(subset1, col=gray(.9), add=T, border=NA)
plot(subset3, col=gray(.5), add=T, border=NA)
plot(subset2, col=gray(.7), add=T, border=NA)
plot(boroughs, border = "black", cex=5, lwd=1, bg="transparent", add=T)
temp <- legend("topleft", legend = c(paste("Q1:", 0, "-", q1cut), paste("Q2:", q1cut, "-", q2cut), paste("Q3:", q2cut, "-", q3cut), paste("Q4:", q3cut, "-", q4cut)),
               #text.width = strwidth("1,000,000"),
               fill=c(gray(.9), gray(.7), gray(.5), gray(.3)), xjust = 1, yjust = 1)
               #title = "Line Types")

dev.off()



